library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.3
## ── Attaching packages ───────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.1 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## Warning: package 'purrr' was built under R version 4.0.3
## Warning: package 'stringr' was built under R version 4.0.3
## ── Conflicts ──────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(MOFA2)
##
## Attaching package: 'MOFA2'
## The following object is masked from 'package:stats':
##
## predict
library(here)
## here() starts at /home/rstudio/promise
#library(MOFAdata)
library(ReactomePA)
##
## ReactomePA v1.34.0 For help: https://guangchuangyu.github.io/ReactomePA
##
## If you use ReactomePA in published research, please cite:
## Guangchuang Yu, Qing-Yu He. ReactomePA: an R/Bioconductor package for reactome pathway analysis and visualization. Molecular BioSystems 2016, 12(2):477-479
library(biomaRt)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
library(enrichplot)
library(dplyr)
library(clusterProfiler)
## clusterProfiler v3.18.1 For help: https://guangchuangyu.github.io/software/clusterProfiler
##
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:biomaRt':
##
## select
## The following object is masked from 'package:purrr':
##
## simplify
## The following object is masked from 'package:stats':
##
## filter
# input
model <- load_model(here("models/mofa/model.hdf5"))
# gene expression data
promise_long_filtered_top <- readRDS(here::here('data/processed/expression/promise_expr_filtered_tidy_top.rds'))
# organoid morphology
umap_df <- readRDS(here::here("data/processed/PhenotypeSpectrum/umap_absolute_all_drugs_sampled.Rds"))
# organoid size
organoid_size_fit <- readRDS(here::here("data/processed/morphology/organoid_size.Rds")) %>%
filter(!line %in% c('D055T01', 'D020T02', 'D021T01')) %>%
#filter(!line %in% c('D055T01','D020T02')) %>%
mutate(line = as.character(line)) %>%
dplyr::select(line, size = x, rep = replicate) %>%
distinct() %>% arrange(line) %>%
mutate(line = substr(line, 1, 4)) %>%
mutate(rep = paste0("r", rep))
# morphology classification
organoid_morphology <- read_delim(here::here("references/imaging/visual_classification_organoids.csv"), ";", escape_double = FALSE, trim_ws = TRUE) %>%
dplyr::select(line = organoid, morphology = visual_inspection_v2) %>%
mutate(line = substr(line, 1, 4)) %>%
mutate(morphology = if_else(is.na(morphology), "other", morphology))
## Parsed with column specification:
## cols(
## organoid = col_character(),
## visual_inspection_morphology_2017 = col_character(),
## visual_class_2_2017 = col_double(),
## visual_inspection_v2 = col_character(),
## visual_inspection_size_2017 = col_character(),
## visual_class_1_2017 = col_double(),
## visual_size_ranking_2018 = col_double(),
## visual_cystic_ranking_2018 = col_double(),
## clustering_jan = col_character()
## )
# drug activity data
data('aucroc', package = 'SCOPEAnalysis')
drug_activity <- aucroc %>% filter(!line %in% c('D055T01', 'D021T01', 'D054T01'))
# gene expression annotation
intestinal_sig <- readxl::read_excel(here('data/external/expression/merloz-suarez_sigantures.xls'),
sheet = 1, skip = 4) %>% .[,1:4] %>%
gather(signature, symbol) %>% drop_na() %>%
mutate(symbol = gsub('\\*', '', symbol))
## New names:
## * `` -> ...5
cris_sig <- readxl::read_excel(here('data/external/expression/cris/41467_2017_BFncomms15107_MOESM422_ESM.xlsx'), sheet = 1, skip = 2) %>%
dplyr::rename(symbol = `Gene ID`, signature = `CRIS Class`)
# organoid mutation
mofa_genetics <- read_delim(here::here("data/processed/mutation/Table-S2_Mutations_PDOs_RevisionV4.csv"), delim = ";") %>%
janitor::clean_names() %>%
mutate(sample = substr(sample, 2,nchar(sample)-1)) %>%
mutate(sample = paste0("D", sample)) %>%
dplyr::filter(!sample %in% c("D021", "D015")) %>%
expand_grid(replicate = c("r1", "r2")) %>%
mutate(sample = paste0(sample, "_", replicate)) %>%
dplyr::select(sample, feature = symbol, everything()) %>% dplyr::select(sample, feature) %>%
mutate(value = 1) %>%
complete(sample, feature, fill = list(value = 0)) %>%
distinct(sample, feature, value) %>%
mutate(view = "mutation")
## Parsed with column specification:
## cols(
## SAMPLE = col_character(),
## SYMBOL = col_character(),
## Protein_position = col_character(),
## Amino_acids = col_character(),
## Consequence = col_character()
## )
weights <- model@expectations$Z$single_group %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()
loadings_size <- model@expectations$W$size_view %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()
loadings_expression <- model@expectations$W$expression %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()
loadings_morphology <- model@expectations$W$morphology %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()
loadings_drug_activity <- model@expectations$W$drug_activity %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()
loadings_mutation <- model@expectations$W$mutation %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()
plot_data_overview(model) +
ggsave(here::here("reports/figures/mofa/data_overview.pdf"))
## Saving 7 x 5 in image
df <- head(model@cache$variance_explained$r2_total[[1]]) %>%
as.data.frame() %>%
rownames_to_column("feature") %>%
magrittr::set_colnames(c("feature", "var")) %>%
mutate(var = var/100)
df %>%
ggplot(aes(feature, var)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_cowplot() +
labs(y = "R2") +
ggsave(here::here("reports/figures/mofa/var_explained_feature.pdf"))
## Saving 7 x 5 in image
Breakdown by factors
model@cache$variance_explained$r2_per_factor$single_group
## drug_activity expression morphology_view mutation size_view
## Factor1 16.33660 14.65125 1.005191 13.997723 3.923018e+01
## Factor2 12.25793 13.05513 15.602844 8.586737 6.894016e-02
## Factor3 12.84796 10.37584 7.765737 3.413264 -2.116287e-06
model@cache$variance_explained$r2_per_factor$single_group %>% as.data.frame() %>%
rownames_to_column("factor") %>%
mutate(overall_variance = expression + morphology_view + size_view + drug_activity) %>%
mutate(overall_variance = overall_variance/4) %>%
ggplot(aes(factor, overall_variance)) +
geom_point(size = 2) +
theme_cowplot()
gg_ve <- plot_variance_explained(model, x="view", y="factor")
gg_ve + coord_flip() +
#coord_equal() +
ggsave(here::here("reports/figures/mofa/var_explained.pdf"))
## Saving 7 x 5 in image
ph_cor <- weights %>%
as.data.frame() %>%
column_to_rownames("id") %>%
cor() %>%
pheatmap::pheatmap()
weights %>%
as.data.frame() %>%
column_to_rownames("id") %>%
cor() %>% as.vector() %>%
.[. != 1] %>%
summary()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.114913 -0.084456 0.006913 -0.028706 0.018139 0.021880
plot_factor(model,
factor = 1:3
)
umap_factor <- weights %>%
separate(id, c("line", "replicate"), sep = "_") %>%
mutate(line = paste0(line, "T01"),
replicate = substr(replicate, 2,2)) %>%
left_join(umap_df %>%
filter(drug == "DMSO"), .) %>%
filter(line != "D020T02")
## Joining, by = c("line", "replicate")
gg_factor_umap <- umap_factor %>%
dplyr::select(-size_factor) %>%
pivot_longer(cols = contains("factor"), names_to = "number", values_to = "value") %>%
ggplot(aes(v1, v2, color = value)) +
ggrastr::geom_point_rast(alpha = 0.1, size = 0.35) +
scale_color_viridis_c() +
cowplot::theme_cowplot() +
labs(x = "UMAP 1",
y = "UMAP 2",
color = "factor value") +
facet_wrap(~ number) +
theme(legend.position = "bottom") +
coord_fixed()
gg_factor_umap + ggsave(here("reports/figures/mofa/factor_overview.pdf"), height = 6 , width = 6)
gg_f12 <- weights %>%
dplyr::rename(line = id) %>%
left_join(organoid_size_fit %>% mutate(line = paste0(line, "_", rep))) %>%
left_join(organoid_morphology %>%
mutate(line = substr(line, 1, 4)) %>%
expand_grid(., rep = c("r1", "r2")) %>%
mutate(sample = paste0(line, "_", rep)) %>% dplyr::select(-line, line = sample)) %>%
distinct() %>%
ggplot(aes(factor1, factor2, label = line, color = size, shape = morphology)) +
geom_point(size = 4) +
ggrepel::geom_text_repel(color = "black") +
scale_color_viridis_c() +
theme_cowplot() +
coord_fixed()
## Joining, by = "line"
## Joining, by = c("line", "rep")
gg_f12 + ggsave(here("reports/figures/mofa/factor_12_plot.pdf"), height = 6 , width = 6)
gg_f23 <- weights %>%
dplyr::rename(line = id) %>%
left_join(organoid_size_fit %>% mutate(line = paste0(line, "_", rep))) %>%
ggplot(aes(factor1, factor3, label = line)) +
geom_point(size = 2) +
ggrepel::geom_text_repel(color = "black") +
scale_color_viridis_c() +
theme_cowplot() +
theme_cowplot() +
coord_fixed()
## Joining, by = "line"
gg_f23 + ggsave(here("reports/figures/mofa/factor_23_plot.pdf"), height = 5 , width = 5)
loadings_mutation %>%
column_to_rownames("id") %>%
pheatmap::pheatmap()
df <- loadings_mutation %>%
gather(key = "factor", value = "weight", -id)
df %>%
ggplot(aes(factor, weight, label = id)) +
geom_point() +
theme_cowplot() +
ggrepel::geom_text_repel(data = df %>% filter(id %in% c("NRAS", "ERBB2", "PTEN", "KRAS", "PIK3CA")))
## load drug annotation
drug_anno <- readxl::read_excel(here::here('references/layouts/Compound_Annotation_Libraries_New.xlsx')) %>% distinct(drug = `Compound name`, target = `Primary Target`)
loadings_drug_activity_tidy <- loadings_drug_activity %>%
mutate(drug = substr(id, 7, nchar(.))) %>%
dplyr::select(-id) %>%
arrange(desc(factor1)) %>%
left_join(drug_anno)
## Joining, by = "drug"
#
# top_n <- 20
#
# top_drugs <- rbind(
# loadings_drug_activity_tidy %>% arrange(factor1) %>% head(top_n),
# loadings_drug_activity_tidy %>% arrange(factor1) %>% tail(top_n),
# loadings_drug_activity_tidy %>% arrange(factor2) %>% head(top_n),
# loadings_drug_activity_tidy %>% arrange(factor2) %>% tail(top_n),
# loadings_drug_activity_tidy %>% arrange(factor3) %>% head(top_n),
# loadings_drug_activity_tidy %>% arrange(factor3) %>% tail(top_n)
# ) %>% distinct()
#
# top_drugs %>%
# column_to_rownames("drug") %>%
# pheatmap::pheatmap()
df <- loadings_drug_activity_tidy %>% dplyr::select(-target) %>%
gather(factor_name, value, -drug) %>%
distinct()
row_anno <- df %>% left_join(loadings_drug_activity_tidy) %>% dplyr::select(drug, target) %>%
mutate(id = paste0(drug, "_", target)) %>% distinct() %>% as.data.frame() %>% column_to_rownames("id")
## Joining, by = "drug"
df %>%
spread(drug, value) %>%
dplyr::select(-factor_name) %>%
cor() %>%
as.matrix() %>%
pheatmap::pheatmap(show_rownames = FALSE,
show_colnames = FALSE)
drug_activity_test <- loadings_drug_activity_tidy %>% semi_join(loadings_drug_activity_tidy %>%
dplyr::count(target) %>%
filter(n >=5))
## Joining, by = "target"
drug_activity_test_result <- rbind(
lm(factor1 ~ target, data = drug_activity_test) %>% summary() %>% broom::tidy() %>% mutate(factor = "factor1"),
lm(factor2 ~ target, data = drug_activity_test) %>% summary() %>% broom::tidy() %>% mutate(factor = "factor2"),
lm(factor3 ~ target, data = drug_activity_test) %>% summary() %>% broom::tidy() %>% mutate(factor = "factor3"))
df <- drug_activity_test_result %>%
filter(term != "(Intercept)") %>%
dplyr::select(term, statistic, factor) %>%
mutate(term = substr(term, 7, nchar(term)))
df %>%
spread(factor, statistic) %>%
column_to_rownames("term") %>%
pheatmap::pheatmap()
drug_activity_test_result %>%
mutate(fdr = p.adjust(p.value, method = "BH")) %>%
filter(fdr <= 0.2) %>%
filter(term != "(Intercept)") %>%
arrange(factor, statistic)
## # A tibble: 6 x 7
## term estimate std.error statistic p.value factor fdr
## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 targetEGFR -0.522 0.102 -5.11 8.39e-7 facto… 2.64e-5
## 2 targetMEK -0.269 0.0956 -2.81 5.50e-3 facto… 6.93e-2
## 3 targetSrc 0.250 0.103 2.42 1.66e-2 facto… 1.70e-1
## 4 targetWnt/beta-ca… 0.353 0.117 3.02 2.94e-3 facto… 4.64e-2
## 5 targetEGFR 0.275 0.0894 3.08 2.44e-3 facto… 4.64e-2
## 6 targetAurora Kina… 0.574 0.0894 6.42 1.28e-9 facto… 8.09e-8
# top 100 genes
loadings_expression %>% arrange(desc(factor1)) %>% head(100) %>%
dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>%
arrange(desc(id)) %>%
dplyr::select(id) %>% write_csv(here::here("reports/tables/factor1_top.csv"))
loadings_expression %>% arrange(desc(factor2)) %>% head(100) %>%
dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>%
dplyr::select(id) %>% write_csv(here::here("reports/tables/factor2_top.csv"))
loadings_expression %>% arrange(desc(factor3)) %>% head(100) %>%
dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>%
dplyr::select(id) %>% write_csv(here::here("reports/tables/factor3_top.csv"))
# all genes
loadings_expression %>% arrange(desc(factor1)) %>%
dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>%
arrange(desc(id)) %>%
dplyr::select(id, factor1) %>% write_csv(here::here("reports/tables/factor1_all.csv"))
loadings_expression %>% arrange(desc(factor2)) %>%
dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>%
dplyr::select(id, factor2) %>% write_csv(here::here("reports/tables/factor2_all.csv"))
loadings_expression %>% arrange(desc(factor3)) %>%
dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>%
dplyr::select(id, factor3) %>% write_csv(here::here("reports/tables/factor3_all.csv"))
weights %>%
dplyr::rename(line = id) %>%
left_join(organoid_size_fit %>% mutate(line = paste0(line, "_", rep))) %>%
left_join(organoid_morphology %>%
mutate(line = substr(line, 1, 4)) %>%
expand_grid(., rep = c("r1", "r2")) %>%
mutate(sample = paste0(line, "_", rep)) %>% dplyr::select(-line, line = sample)) %>%
distinct() %>%
ggplot(aes(factor1, size, label = line)) +
geom_smooth(method = "lm", se = FALSE) +
geom_point(size = 4) +
ggrepel::geom_text_repel(color = "black") +
scale_color_viridis_c() +
labs(y = "expected size") +
theme_cowplot()
## Joining, by = "line"
## Joining, by = c("line", "rep")
## `geom_smooth()` using formula 'y ~ x'
plot_weights(model,
view = "expression",
factor = 1,
nfeatures = 10, # Number of features to highlight
scale = T, # Scale weights from -1 to 1
abs = F # Take the absolute value?
)
## Warning: Ignoring unknown parameters: max.overlaps
growth control via IGF signaling cell adhesion via FYN signaling, high fibronectin expression DLX- TGFb and BMP inhibiting transcription factor
df <- loadings_expression %>%
separate(id, c("symbol"), sep = "_exp") %>%
filter(symbol != "") %>%
left_join(promise_long_filtered_top %>% dplyr::select(symbol, entrez) %>% distinct()) %>%
arrange(desc(factor1)) %>%
mutate(order = nrow(.):1)
## Warning: Expected 1 pieces. Additional pieces discarded in 3222 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining, by = "symbol"
df %>%
ggplot(aes(order, factor1)) +
geom_point()+
geom_point(data = df %>% filter(symbol %in% c("H19", "IGF2")),
color = "blue") +
theme_cowplot()
df <- loadings_expression %>%
separate(id, c("symbol"), sep = "_exp") %>%
filter(symbol != "") %>%
left_join(promise_long_filtered_top %>% dplyr::select(symbol, entrez) %>% distinct()) %>%
arrange(desc(factor1))
## Warning: Expected 1 pieces. Additional pieces discarded in 3222 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining, by = "symbol"
ranks_1 <- setNames(df$factor1, as.character(df$entrez))
ranks_1 <- sort(ranks_1, decreasing = T)
## Reactome enrichment analysis
gse_reactome_1 <- gsePathway(
geneList = ranks_1,
organism = 'human',
#nPerm = 1e5,
#minGSSize = 100,
#maxGSSize = 500,
pvalueCutoff = 0.2
)
## Loading required package: org.Hs.eg.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
##
## rename
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
##
## slice
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
##
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## leading edge analysis...
## done...
reactome <- pairwise_termsim(gse_reactome_1)
emapplot(reactome, color = "NES")
gse_go <- gseGO(
geneList = ranks_1,
OrgDb = org.Hs.eg.db,
ont = 'BP',
# nPerm = 1e5,
# minGSSize = 10,
# maxGSSize = 500,
pvalueCutoff = 0.2
)
go <- pairwise_termsim(gse_go)
emapplot(go, color = "NES")
# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
dplyr::select(-id) %>%
left_join(promise_long_filtered_top) %>%
dplyr::select(factor1, symbol) %>% distinct() %>%
arrange(desc(factor1))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor1, as.character(df$symbol))
gse_sig <- GSEA(
geneList = ranks_symbol,
TERM2GENE = intestinal_sig,
nPerm = 1e5,
minGSSize = 1,
maxGSSize = 1000,
pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_sig_tbl <- as_tibble(gse_sig)
## proliferation
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[1],
title = paste0(gse_sig$ID[1],
' (p = ', round(gse_sig_tbl$pvalue[1], 3),
'; NES = ', round(gse_sig_tbl$NES[1], 2), ')')
)
## late TA signature
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[2],
title = paste0(gse_sig$ID[2],
' (p = ', round(gse_sig_tbl$pvalue[2], 3),
'; NES = ', round(gse_sig_tbl$NES[2], 2), ')')
)
## LGR5
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[3],
title = paste0(gse_sig$ID[3],
' (p = ', round(gse_sig_tbl$pvalue[3], 3),
'; NES = ', round(gse_sig_tbl$NES[3], 2), ')')
)
## EPH
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[4],
title = paste0(gse_sig$ID[4],
' (p = ', round(gse_sig_tbl$pvalue[1], 3),
'; NES = ', round(gse_sig_tbl$NES[1], 2), ')')
)
# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
dplyr::select(-id) %>%
left_join(promise_long_filtered_top) %>%
dplyr::select(factor1, symbol) %>% distinct() %>%
arrange(desc(factor1))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor1, as.character(df$symbol))
gse_cris <- GSEA(
geneList = ranks_symbol,
TERM2GENE = cris_sig %>% dplyr::select(signature, symbol),
nPerm = 1e5,
minGSSize = 1,
maxGSSize = 1000,
pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_cris_tbl <- as_tibble(gse_cris)
gse_cris_tbl
## # A tibble: 5 x 11
## ID Description setSize enrichmentScore NES pvalue p.adjust qvalues
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 CRIS… CRIS-D 41 0.621 2.17 2.26e-5 0.000113 NA
## 2 CRIS… CRIS-C 75 -0.444 -1.68 1.27e-3 0.00317 NA
## 3 CRIS… CRIS-A 88 -0.399 -1.55 5.25e-3 0.00875 NA
## 4 CRIS… CRIS-E 48 -0.351 -1.22 1.56e-1 0.194 NA
## 5 CRIS… CRIS-B 40 0.249 0.866 7.03e-1 0.703 NA
## # … with 3 more variables: rank <dbl>, leading_edge <chr>,
## # core_enrichment <chr>
## proliferation
gseaplot2(
gse_cris, geneSetID = gse_cris$ID[1],
title = paste0(gse_cris$ID[1],
' (p = ', round(gse_cris_tbl$pvalue[1], 3),
'; NES = ', round(gse_cris_tbl$NES[1], 2), ')')
)
gseaplot2(
gse_cris, geneSetID = gse_cris$ID[2],
title = paste0(gse_cris$ID[2],
' (p = ', round(gse_cris_tbl$pvalue[2], 3),
'; NES = ', round(gse_cris_tbl$NES[2], 2), ')')
)
aggregate_drugs <- drug_activity_test %>% group_by(target) %>% summarise_at(vars(contains("factor")), funs("median"))
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
include_drugs <- aggregate_drugs %>% arrange(desc(factor1)) %>% .$target
drug_activity_test %>%
drop_na() %>%
mutate(target = factor(target, levels = include_drugs)) %>%
ggplot(aes(y = target, x = factor1)) +
#geom_point() +
ggridges::geom_density_ridges() +
geom_hline(yintercept = 0) +
#coord_flip() +
cowplot::theme_cowplot()
## Picking joint bandwidth of 0.11
median weighting within the factor was strongest for activity of MEK, IGF1R inhibitors. mTOR inhibitors ranked among the strongest drugs as well. In contrast, EGFR inhibitor activity had the most negative median contribution to the factor. Put differently, organoid lines that had a high score for factor 1 tended to be sensitive to MEK and IGF-1R inhibitors and less responsive to EGFR and Syk inhibitors.
EGFR inhibitor activity has a significant negative contribution to the factor. This means that lines with a high factor1 score, show little to no activity when treated with EGFR inhibitors.
drug_activity_joined <- weights %>%
mutate(id = substr(id, 1, 4)) %>%
group_by(id) %>%
summarise_all(funs(mean)) %>%
mutate(line = paste0(id, "T01")) %>%
left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>%
dplyr::filter(target == "EGFR") %>%
.$drug
drug_activity_joined %>%
filter(drug %in% doi) %>%
ggplot(aes(factor1, auroc)) +
geom_point() +
cowplot::theme_cowplot() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
drug_activity_joined <- weights %>%
mutate(id = substr(id, 1, 4)) %>%
group_by(id) %>%
summarise_all(funs(mean)) %>%
mutate(line = paste0(id, "T01")) %>%
left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>%
dplyr::filter(target == "MEK") %>%
.$drug
drug_activity_joined %>%
filter(drug %in% doi) %>%
ggplot(aes(factor1, auroc)) +
geom_point() +
cowplot::theme_cowplot() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
drug_activity_joined <- weights %>%
mutate(id = substr(id, 1, 4)) %>%
group_by(id) %>%
summarise_all(funs(mean)) %>%
mutate(line = paste0(id, "T01")) %>%
left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>%
dplyr::filter(target == "IGF-1R") %>%
.$drug
drug_activity_joined %>%
filter(drug %in% doi) %>%
ggplot(aes(factor1, auroc)) +
geom_point() +
cowplot::theme_cowplot() +
geom_smooth(method = "lm", se = FALSE, color = "black") +
facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
drug_activity_joined <- weights %>%
mutate(id = substr(id, 1, 4)) %>%
group_by(id) %>%
summarise_all(funs(mean)) %>%
mutate(line = paste0(id, "T01")) %>%
left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>%
dplyr::filter(target == "mTOR") %>%
.$drug
drug_activity_joined %>%
filter(drug %in% doi) %>%
ggplot(aes(factor1, auroc)) +
geom_point() +
cowplot::theme_cowplot() +
geom_smooth(method = "lm", se = FALSE, color = "green") +
facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
within the group of mTOR inhibitors, activity towards treatment with the small molecule WYE-132 had the strongest contribution to the factor. WYE-132 is an ATP competitive inhibitor of mTORC1 and mTORC2
Next we wondered how treatment with members from these highly active groups would change the phenotype of organoids
df <- mofa_genetics %>%
mutate(sample = substr(sample, 1, 4)) %>%
distinct() %>%
filter(feature %in% c("NRAS"))
drug_activity_joined %>%
dplyr::select(-line, sample = id) %>%
filter(drug %in% c("Selumetinib (AZD6244)")) %>%
left_join(df) %>%
drop_na() %>%
mutate(value = if_else(value == 0, "WT", "mut")) %>%
ggplot(aes(value, auroc)) +
geom_point() +
ggsignif::geom_signif(comparisons = list(c("WT", "mut"))) +
theme_cowplot() +
facet_grid(drug ~ feature)
## Joining, by = "sample"
weights %>%
dplyr::rename(line = id) %>%
left_join(organoid_morphology %>%
mutate(line = substr(line, 1, 4)) %>%
expand_grid(., rep = c("r1", "r2")) %>%
mutate(sample = paste0(line, "_", rep)) %>% dplyr::select(-line, line = sample)) %>%
distinct() %>%
ggplot(aes(factor1, factor2, label = line, color = morphology)) +
geom_point(size = 4) +
ggrepel::geom_text_repel(color = "black") +
theme_cowplot() +
scale_color_brewer(type = "qual")
## Joining, by = "line"
plot_weights(model,
view = "expression",
factor = 2,
nfeatures = 10, # Number of features to highlight
scale = T, # Scale weights from -1 to 1
abs = F # Take the absolute value?
)
## Warning: Ignoring unknown parameters: max.overlaps
growth control via IGF signaling cell adhesion via FYN signaling, high fibronectin expression DLX- TGFb and BMP inhibiting transcription factor
df <- loadings_expression %>%
separate(id, c("symbol"), sep = "_exp") %>%
filter(symbol != "") %>%
left_join(promise_long_filtered_top %>% dplyr::select(symbol, entrez) %>% distinct()) %>%
arrange(desc(factor2))
## Warning: Expected 1 pieces. Additional pieces discarded in 3222 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining, by = "symbol"
ranks_2 <- setNames(df$factor2, as.character(df$entrez))
ranks_2 <- sort(ranks_2, decreasing = T)
## Reactome enrichment analysis
gse_reactome_2 <- gsePathway(
geneList = ranks_2,
organism = 'human',
#nPerm = 1e5,
#minGSSize = 100,
#maxGSSize = 500,
pvalueCutoff = 0.2
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## leading edge analysis...
## done...
reactome <- pairwise_termsim(gse_reactome_2)
emapplot(reactome, color = "NES")
gse_go <- gseGO(
geneList = ranks_2,
OrgDb = org.Hs.eg.db,
ont = 'BP',
# nPerm = 1e5,
# minGSSize = 10,
# maxGSSize = 500,
pvalueCutoff = 0.2
)
go <- pairwise_termsim(gse_go)
emapplot(go, color = "NES")
# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
dplyr::select(-id) %>%
left_join(promise_long_filtered_top) %>%
dplyr::select(factor2, symbol) %>% distinct() %>%
arrange(desc(factor2))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor2, as.character(df$symbol))
gse_sig <- GSEA(
geneList = ranks_symbol,
TERM2GENE = intestinal_sig,
nPerm = 1e5,
minGSSize = 1,
maxGSSize = 1000,
pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_sig_tbl <- as_tibble(gse_sig)
## proliferation
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[1],
title = paste0(gse_sig$ID[1],
' (p = ', round(gse_sig_tbl$pvalue[1], 3),
'; NES = ', round(gse_sig_tbl$NES[1], 2), ')')
)
## lgr5 signature
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[2],
title = paste0(gse_sig$ID[2],
' (p = ', round(gse_sig_tbl$pvalue[2], 3),
'; NES = ', round(gse_sig_tbl$NES[2], 2), ')')
)
## proliferation
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[3],
title = paste0(gse_sig$ID[3],
' (p = ', round(gse_sig_tbl$pvalue[3], 3),
'; NES = ', round(gse_sig_tbl$NES[3], 2), ')')
)
## proliferation
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[4],
title = paste0(gse_sig$ID[4],
' (p = ', round(gse_sig_tbl$pvalue[4], 3),
'; NES = ', round(gse_sig_tbl$NES[4], 2), ')')
)
# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
dplyr::select(-id) %>%
left_join(promise_long_filtered_top) %>%
dplyr::select(factor2, symbol) %>% distinct() %>%
arrange(desc(factor2))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor2, as.character(df$symbol))
gse_cris <- GSEA(
geneList = ranks_symbol,
TERM2GENE = cris_sig %>% dplyr::select(signature, symbol),
nPerm = 1e5,
minGSSize = 1,
maxGSSize = 1000,
pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_cris_tbl <- as_tibble(gse_cris)
gse_cris_tbl
## # A tibble: 5 x 11
## ID Description setSize enrichmentScore NES pvalue p.adjust qvalues
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 CRIS… CRIS-E 48 -0.657 -2.54 4.38e-5 0.000154 NA
## 2 CRIS… CRIS-A 88 -0.429 -1.87 6.15e-5 0.000154 NA
## 3 CRIS… CRIS-C 75 -0.362 -1.53 5.44e-3 0.00907 NA
## 4 CRIS… CRIS-D 41 0.456 1.44 3.90e-2 0.0487 NA
## 5 CRIS… CRIS-B 40 -0.251 -0.934 5.79e-1 0.579 NA
## # … with 3 more variables: rank <dbl>, leading_edge <chr>,
## # core_enrichment <chr>
## proliferation
gseaplot2(
gse_cris, geneSetID = gse_cris$ID[3],
title = paste0(gse_cris$ID[3],
' (p = ', round(gse_cris_tbl$pvalue[3], 5),
'; NES = ', round(gse_cris_tbl$NES[3], 2), ')')
)
## proliferation
gseaplot2(
gse_cris, geneSetID = gse_cris$ID[1],
title = paste0(gse_cris$ID[1],
' (p = ', round(gse_cris_tbl$pvalue[1], 5),
'; NES = ', round(gse_cris_tbl$NES[1], 2), ')')
)
gseaplot2(
gse_cris, geneSetID = gse_cris$ID[4],
title = paste0(gse_cris$ID[4],
' (p = ', round(gse_cris_tbl$pvalue[4], 5),
'; NES = ', round(gse_cris_tbl$NES[4], 2), ')')
)
aggregate_drugs <- drug_activity_test %>% group_by(target) %>% summarise_at(vars(contains("factor")), funs("median"))
include_drugs <- aggregate_drugs %>% arrange(desc(factor2)) %>% .$target
drug_activity_test %>%
drop_na() %>%
mutate(target = factor(target, levels = include_drugs)) %>%
ggplot(aes(y = target, x = factor2)) +
#geom_point() +
ggridges::geom_density_ridges() +
geom_hline(yintercept = 0) +
#coord_flip() +
cowplot::theme_cowplot()
## Picking joint bandwidth of 0.104
median weighting within the factor was strongest for activity of Wnt/bcatenin, Src, EGFR and FAK inhibitors. In contrast, MEK, ERK inhibitor activity had among the most negative median contribution to the factor, similarly mTOR inhibitors, albeit not statistically significant. Put differently, organoid lines that had a high score for factor 2 tended to be sensitive to Wnt and EGFR targeting and less responsive to MEK and ERK inhibitors.
drug_activity_joined <- weights %>%
mutate(id = substr(id, 1, 4)) %>%
group_by(id) %>%
summarise_all(funs(mean)) %>%
mutate(line = paste0(id, "T01")) %>%
left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>%
dplyr::filter(target == "EGFR") %>%
.$drug
drug_activity_joined %>%
filter(drug %in% doi) %>%
ggplot(aes(factor2, auroc)) +
geom_point() +
cowplot::theme_cowplot() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
drug_activity_joined <- weights %>%
mutate(id = substr(id, 1, 4)) %>%
group_by(id) %>%
summarise_all(funs(mean)) %>%
mutate(line = paste0(id, "T01")) %>%
left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>%
dplyr::filter(target == "MEK") %>%
.$drug
drug_activity_joined %>%
filter(drug %in% doi) %>%
ggplot(aes(factor2, auroc)) +
geom_point() +
cowplot::theme_cowplot() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
doi <- drug_activity_test %>%
dplyr::filter(target == "Wnt/beta-catenin") %>%
.$drug
drug_activity_joined %>%
filter(drug %in% doi) %>%
ggplot(aes(factor2, auroc)) +
geom_point() +
cowplot::theme_cowplot() +
geom_smooth(method = "lm", se = FALSE, color = "black") +
facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
drug_activity_joined <- weights %>%
mutate(id = substr(id, 1, 4)) %>%
group_by(id) %>%
summarise_all(funs(mean)) %>%
mutate(line = paste0(id, "T01")) %>%
left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>%
dplyr::filter(target == "mTOR") %>%
.$drug
drug_activity_joined %>%
filter(drug %in% doi) %>%
ggplot(aes(factor2, auroc)) +
geom_point() +
cowplot::theme_cowplot() +
geom_smooth(method = "lm", se = FALSE, color = "green") +
facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
again, within the group of mTOR inhibitors, activity towards treatment with the small molecule WYE-132 had the strongest contribution to the factor - showing less activity in factor 2 high organoids compared to factor 2 low organoid lines.
plot_weights(model,
view = "expression",
factor = 3,
nfeatures = 10, # Number of features to highlight
scale = T, # Scale weights from -1 to 1
abs = F # Take the absolute value?
)
## Warning: Ignoring unknown parameters: max.overlaps
NRX cell adhesion xenobiotic metabolism
df <- loadings_expression %>%
separate(id, c("symbol"), sep = "_exp") %>%
filter(symbol != "") %>%
left_join(promise_long_filtered_top %>% dplyr::select(symbol, entrez) %>% distinct()) %>%
arrange(desc(factor3))
## Warning: Expected 1 pieces. Additional pieces discarded in 3222 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining, by = "symbol"
ranks_3 <- setNames(df$factor3, as.character(df$entrez))
ranks_3 <- sort(ranks_3, decreasing = T)
## Reactome enrichment analysis
gse_reactome_3 <- gsePathway(
geneList = ranks_3,
organism = 'human',
#nPerm = 1e5,
#minGSSize = 100,
#maxGSSize = 500,
pvalueCutoff = 0.2
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## leading edge analysis...
## done...
reactome <- pairwise_termsim(gse_reactome_3)
emapplot(reactome, color = "NES")
gse_go <- gseGO(
geneList = ranks_3,
OrgDb = org.Hs.eg.db,
ont = 'BP',
# nPerm = 1e5,
# minGSSize = 10,
# maxGSSize = 500,
pvalueCutoff = 0.2
)
go <- pairwise_termsim(gse_go)
emapplot(go, color = "NES")
# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
dplyr::select(-id) %>%
left_join(promise_long_filtered_top) %>%
dplyr::select(factor3, symbol) %>% distinct() %>%
arrange(desc(factor3))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor3, as.character(df$symbol))
gse_sig <- GSEA(
geneList = ranks_symbol,
TERM2GENE = intestinal_sig,
nPerm = 1e5,
minGSSize = 1,
maxGSSize = 1000,
pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_sig_tbl <- as_tibble(gse_sig)
## proliferation
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[1],
title = paste0(gse_sig$ID[1],
' (p = ', round(gse_sig_tbl$pvalue[1], 3),
'; NES = ', round(gse_sig_tbl$NES[1], 2), ')')
)
## lgr5 signature
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[2],
title = paste0(gse_sig$ID[2],
' (p = ', round(gse_sig_tbl$pvalue[2], 3),
'; NES = ', round(gse_sig_tbl$NES[2], 2), ')')
)
## proliferation
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[3],
title = paste0(gse_sig$ID[3],
' (p = ', round(gse_sig_tbl$pvalue[3], 3),
'; NES = ', round(gse_sig_tbl$NES[3], 2), ')')
)
## proliferation
gseaplot2(
gse_sig, geneSetID = gse_sig$ID[4],
title = paste0(gse_sig$ID[4],
' (p = ', round(gse_sig_tbl$pvalue[4], 3),
'; NES = ', round(gse_sig_tbl$NES[4], 2), ')')
)
set.seed(1234)
# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
dplyr::select(-id) %>%
left_join(promise_long_filtered_top) %>%
dplyr::select(factor3, symbol) %>% distinct() %>%
arrange(desc(factor3))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor3, as.character(df$symbol))
gse_cris <- GSEA(
geneList = ranks_symbol,
TERM2GENE = cris_sig %>% dplyr::select(signature, symbol),
nPerm = 1e5,
minGSSize = 1,
maxGSSize = 1000,
pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_cris_tbl <- as_tibble(gse_cris)
gse_cris_tbl
## # A tibble: 5 x 11
## ID Description setSize enrichmentScore NES pvalue p.adjust qvalues rank
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <lgl> <dbl>
## 1 CRIS… CRIS-B 40 -0.650 -2.02 6.30e-5 0.000315 NA 246
## 2 CRIS… CRIS-A 88 -0.475 -1.70 3.27e-4 0.000818 NA 499
## 3 CRIS… CRIS-E 48 0.500 1.43 3.40e-2 0.0533 NA 708
## 4 CRIS… CRIS-C 75 0.446 1.36 4.26e-2 0.0533 NA 528
## 5 CRIS… CRIS-D 41 0.484 1.35 7.24e-2 0.0724 NA 564
## # … with 2 more variables: leading_edge <chr>, core_enrichment <chr>
## proliferation
gseaplot2(
gse_cris, geneSetID = gse_cris$ID[1],
title = paste0(gse_cris$ID[1],
' (p = ', round(gse_cris_tbl$pvalue[1], 3),
'; NES = ', round(gse_cris_tbl$NES[1], 2), ')')
)
gseaplot2(
gse_cris, geneSetID = gse_cris$ID[2],
title = paste0(gse_cris$ID[2],
' (p = ', round(gse_cris_tbl$pvalue[2], 3),
'; NES = ', round(gse_cris_tbl$NES[2], 2), ')')
)
aggregate_drugs <- drug_activity_test %>% group_by(target) %>% summarise_at(vars(contains("factor")), funs("median"))
include_drugs <- aggregate_drugs %>% arrange(desc(factor3)) %>% .$target
drug_activity_test %>%
drop_na() %>%
mutate(target = factor(target, levels = include_drugs)) %>%
ggplot(aes(y = target, x = factor3)) +
geom_vline(xintercept = 0) +
#geom_point() +
ggridges::geom_density_ridges() +
#coord_flip() +
cowplot::theme_cowplot()
## Picking joint bandwidth of 0.0945
median weighting within the factor was strongest for activity of AURK inhibitors. In contrast, GSK and EGFR inhibitor activity had the most negative median contribution to the factor. Put differently, organoid lines that had a high score for factor 3 tended to be sensitive to ARK targeting and less responsive to GSK and EGFR inhibitors.
doi <- drug_activity_test %>%
dplyr::filter(target == "EGFR") %>%
.$drug
drug_activity_joined %>%
filter(drug %in% doi) %>%
ggplot(aes(factor3, auroc)) +
geom_point() +
cowplot::theme_cowplot() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
doi <- drug_activity_test %>%
dplyr::filter(target == "GSK-3") %>%
.$drug
drug_activity_joined %>%
filter(drug %in% doi) %>%
ggplot(aes(factor3, auroc)) +
geom_point() +
cowplot::theme_cowplot() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
doi <- drug_activity_test %>%
dplyr::filter(target == "Aurora Kinase") %>%
.$drug
drug_activity_joined %>%
filter(drug %in% doi) %>%
ggplot(aes(factor3, auroc)) +
geom_point() +
cowplot::theme_cowplot() +
geom_smooth(method = "lm", se = FALSE, color = "black") +
facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
again, within the group of mTOR inhibitors, activity towards treatment with the small molecule WYE-132 had the strongest contribution to the factor - showing less activity in factor 2 high organoids compared to factor 2 low organoid lines.